Python Geospatial Data Analysis

Import Libraries¶

In [27]:
# Standard library imports
import math

# Third-party library imports
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster, HeatMap

Import Data¶

In [5]:
# Load the data from the provided URL into a pandas DataFrame
df = pd.read_csv('https://data.boston.gov/dataset/6220d948-eae2-4e4b-8723-2dc8e67722a3/resource/e86f8e38-a23c-4c1a-8455-c8f94210a8f1/download/tmpf_uzkqpk.csv')

# Display the first 6 rows of the DataFrame to get a quick overview of the data
df.head(6)
Out[5]:
INCIDENT_NUMBER OFFENSE_CODE OFFENSE_CODE_GROUP OFFENSE_DESCRIPTION DISTRICT REPORTING_AREA SHOOTING OCCURRED_ON_DATE YEAR MONTH DAY_OF_WEEK HOUR UCR_PART STREET Lat Long Location
0 I192074715 2629 Harassment HARASSMENT B2 278 NaN 2018-01-01 00:00:00 2018 1 Monday 0 Part Two HARRISON AVE 42.331538 -71.080157 (42.33153805, -71.08015661)
1 I192068538 1107 Fraud FRAUD - IMPERSONATION D14 794 NaN 2018-01-01 00:00:00 2018 1 Monday 0 Part Two GLENVILLE AVE 42.349780 -71.134230 (42.34977988, -71.13423049)
2 I192005657 2610 Other TRESPASSING C11 396 NaN 2018-01-01 00:00:00 2018 1 Monday 0 Part Two MELBOURNE ST 42.291093 -71.065945 (42.29109287, -71.06594539)
3 I192075335 3208 Property Lost PROPERTY - MISSING D4 132 NaN 2018-01-01 00:00:00 2018 1 Monday 0 Part Three COMMONWEALTH AVE 42.353522 -71.072838 (42.35352153, -71.07283786)
4 I192013179 619 Larceny LARCENY ALL OTHERS C11 360 NaN 2018-01-01 00:00:00 2018 1 Monday 0 Part One CENTERVALE PARK 42.296323 -71.063569 (42.29632282, -71.06356881)
5 I182072846 617 Larceny LARCENY THEFT FROM BUILDING C11 353 NaN 2018-01-01 00:00:00 2018 1 Monday 0 Part One FREEPORT ST 42.301499 -71.050712 (42.30149875, -71.05071215)

Generate Base Map¶

A BASE MAP OF BOSTON AREA

In [6]:
# Define the latitude and longitude coordinates for Boston
boston = (42.358443, -71.05977)

# Create a folium map centered around Boston
m = folium.Map(location=boston, title='Stamen terrain', zoom_start=12)

# Display the map
m
Out[6]:
Make this Notebook Trusted to load map: File -> Trust Notebook

Mark Crime Scenes¶

A MAP SHOWING DISTRICTS WITH THE HIGHEST CRIME RATES

In [7]:
# Create a MarkerCluster to group markers together for efficient map rendering
mc = MarkerCluster()

# Iterate through the rows of the DataFrame 'df'
# 'idx' is the index of the row, and 'row' is the data in the row as a Pandas Series
for idx, row in df.iterrows():
    # Check if the 'Long' and 'Lat' values in the current row are not NaN (not missing)
    if not math.isnan(row['Long']) and not math.isnan(row['Lat']):
        # If the 'Long' and 'Lat' values are valid, add a folium.Marker to the MarkerCluster
        # with the latitude and longitude extracted from the current row
        mc.add_child(folium.Marker([row['Lat'], row['Long']]))

# Add the MarkerCluster 'mc' to the folium map 'm'
m.add_child(mc)
Out[7]:
Make this Notebook Trusted to load map: File -> Trust Notebook

View Districts with Highest Crime Rates¶

In [8]:
# Group the 'df' DataFrame by multiple columns: 'DISTRICT', 'STREET', 'REPORTING_AREA', 'Lat', and 'Long'
# Then, calculate the sum of all other numerical columns for each group
# The result is a new DataFrame named 'crime' with the grouped data
# The 'reset_index()' method is used to reset the index of the resulting DataFrame
crime = df.groupby(['DISTRICT', 'STREET', 'REPORTING_AREA', 'Lat', 'Long']).sum().reset_index()
In [9]:
# Comment:
# The following two lines of code modify the 'DISTRICT' and 'REPORTING_AREA' columns in the 'crime' DataFrame.
# It updates the existing values in these columns by adding custom labels to improve data clarity.

# Add custom label 'District:' to the 'DISTRICT' column
crime['DISTRICT'] = crime['DISTRICT'].map('District:{}'.format)

# Add custom label 'Reports:' to the 'REPORTING_AREA' column
crime['REPORTING_AREA'] = crime['REPORTING_AREA'].map('Reports:{}'.format)

HEATMAP SHOWING CRIME RATE

In [18]:
# Create a folium map centered around 'boston' with 'StamenToner' tiles and zoom level 12
m2 = folium.Map(location=boston, tiles='StamenToner', zoom_start=12)

# Create a HeatMap layer using the 'crime' DataFrame's 'Lat' and 'Long' columns as data points
# and set the radius of the heatmap points to 15
HeatMap(data=crime[['Lat', 'Long']], radius=15).add_to(m2)

# Define a function to plot individual circle markers for each row in the 'crime' DataFrame
def plotDot(point):
    folium.CircleMarker(location=[point.Lat, point.Long],
        radius=5,
        weight=2,
        popup=f"Crime Type: {point['OFFENSE_CODE_GROUP']}<br>District: {point['DISTRICT']}<br>Reporting Area: {point['REPORTING_AREA']}",
        fill_color='#000000').add_to(m2)

# Apply the 'plotDot' function to each row of the 'crime' DataFrame
crime.apply(plotDot, axis=1)

# Fit the map 'm2' to the bounds of the data points to ensure all points are visible on the map
m2.fit_bounds(m2.get_bounds())

# Display the folium map 'm2' with the HeatMap and circle markers
m2
Out[18]:
Make this Notebook Trusted to load map: File -> Trust Notebook

Medical Assistance Analysis¶

A MAP SHOWING REQUIRED MEDICAL ASSISTANCE

In [20]:
# The code below filters the DataFrame 'df' to create a new DataFrame 'med' containing only the rows
# where the 'OFFENSE_CODE_GROUP' is 'Medical Assistance', and selects the 'Lat' and 'Long' columns.

# Filter the DataFrame to create 'med' DataFrame for 'Medical Assistance' incidents
med = df.loc[df.OFFENSE_CODE_GROUP == 'Medical Assistance'][['Lat', 'Long']]

# The code below fills missing values in the 'Lat' and 'Long' columns of the 'med' DataFrame with 0.

# Fill missing values in 'Lat' and 'Long' columns of 'med' DataFrame with 0
med.Lat.fillna(0, inplace=True)
med.Long.fillna(0, inplace=True)

# Create a folium map centered around 'boston' with 'openstreetmap' tiles and zoom level 11
m6 = folium.Map(location=boston, tiles='openstreetmap', zoom_start=11)

# Create a HeatMap layer using the 'Lat' and 'Long' columns from the 'med' DataFrame
# and set the radius of the heatmap points to 16
HeatMap(data=med, radius=16).add_to(m6)

# Display the folium map 'm6' with the HeatMap
m6
Out[20]:
Make this Notebook Trusted to load map: File -> Trust Notebook

Crimes Commited¶

A BAR GRAPH SHOWING COMMITTED CRIMES

In [29]:
# Create a categorical plot (catplot) to count occurrences of each 'OFFENSE_CODE_GROUP'
plot = sns.catplot(
    y='OFFENSE_CODE_GROUP',
    kind='count',
    height=8,
    aspect=1.5,
    order=df.OFFENSE_CODE_GROUP.value_counts().index,
    data=df
)

# Set the title for the graph
plot.set_axis_labels("Count", "Offense Code Group")
plot.fig.suptitle("Count of Offense Code Groups", y=1.02)

# Adjust the figure layout to avoid overlapping elements
plot.fig.tight_layout()

# Show the plot
plt.show()      
c:\Users\user\AppData\Local\Programs\Python\Python311\Lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight
  self._figure.tight_layout(*args, **kwargs)
C:\Users\user\AppData\Local\Temp\ipykernel_14740\75682180.py:16: UserWarning: The figure layout has changed to tight
  plot.fig.tight_layout()
No description has been provided for this image

Motor Vehicle Accident Response¶

OPENSTREETMAP SHOWING POSSIBLE CAUSES OF MOTOR VEHICLE ACCIDENTS

In [31]:
# The code below filters the DataFrame 'df' to create a new DataFrame 'mv' containing only the rows
# where the 'OFFENSE_CODE_GROUP' is 'Motor Vehicle Accident Response', and selects the 'Lat' and 'Long' columns.

# Filter the DataFrame to create 'mv' DataFrame for 'Motor Vehicle Accident Response' incidents
mv = df.loc[df.OFFENSE_CODE_GROUP == 'Motor Vehicle Accident Response'][['Lat', 'Long']]


# The code below fills missing values in the 'Lat' and 'Long' columns of the 'mv' DataFrame with 0.
# Fill missing values in 'Lat' and 'Long' columns of 'mv' DataFrame with 0
mv.fillna(0, inplace=True)

# Create a folium map centered around 'boston' with 'openstreetmap' tiles and zoom level 11
m4 = folium.Map(location=boston, tiles='openstreetmap', zoom_start=11)

# Create a HeatMap layer using the 'Lat' and 'Long' columns from the 'mv' DataFrame
# and set the radius of the heatmap points to 16
HeatMap(data=mv, radius=16, tooltip="Motor Vehicle Accident Response").add_to(m4)

# Display the folium map 'm4' with the HeatMap
m4   
Out[31]:
Make this Notebook Trusted to load map: File -> Trust Notebook

Larceny¶

OPENSTREETMAP SHOWING POSSIBLE CAUSES OF LARCENCY

In [32]:
# The code below filters the DataFrame 'df' to create a new DataFrame 'lar' containing only the rows
# where the 'OFFENSE_CODE_GROUP' is 'Larceny', and selects the 'Lat' and 'Long' columns.

# Filter the DataFrame to create 'lar' DataFrame for 'Larceny' incidents
lar = df.loc[df.OFFENSE_CODE_GROUP == 'Larceny'][['Lat', 'Long']]

# The code below fills missing values in the 'Lat' and 'Long' columns of the 'lar' DataFrame with 0.
# Fill missing values in 'Lat' and 'Long' columns of 'lar' DataFrame with 0
lar.fillna(0, inplace=True)

# Create a folium map centered around 'boston' with 'openstreetmap' tiles and zoom level 11
m5 = folium.Map(location=boston, tiles='openstreetmap', zoom_start=11)

# Create a HeatMap layer using the 'Lat' and 'Long' columns from the 'lar' DataFrame
# and set the radius of the heatmap points to 16
HeatMap(data=lar, radius=16, tooltip="Larceny").add_to(m5)

# Display the folium map 'm5' with the HeatMap
m5  
   
Out[32]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]: